1
2 package org.apache.lucene.queryparser.surround.parser;
3
4 import java.util.ArrayList;
5 import java.util.List;
6 import java.io.StringReader;
7
8
9 import org.apache.lucene.analysis.TokenStream;
10
11 import org.apache.lucene.queryparser.surround.query.SrndQuery;
12 import org.apache.lucene.queryparser.surround.query.FieldsQuery;
13 import org.apache.lucene.queryparser.surround.query.OrQuery;
14 import org.apache.lucene.queryparser.surround.query.AndQuery;
15 import org.apache.lucene.queryparser.surround.query.NotQuery;
16 import org.apache.lucene.queryparser.surround.query.DistanceQuery;
17 import org.apache.lucene.queryparser.surround.query.SrndTermQuery;
18 import org.apache.lucene.queryparser.surround.query.SrndPrefixQuery;
19 import org.apache.lucene.queryparser.surround.query.SrndTruncQuery;
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46 public class QueryParser implements QueryParserConstants {
47 final int minimumPrefixLength = 3;
48 final int minimumCharsInTrunc = 3;
49 final String truncationErrorMessage = "Too unrestrictive truncation: ";
50 final String boostErrorMessage = "Cannot handle boost value: ";
51
52
53 final char truncator = '*';
54 final char anyChar = '?';
55 final char quote = '"';
56 final char fieldOperator = ':';
57 final char comma = ',';
58 final char carat = '^';
59
60 static public SrndQuery parse(String query) throws ParseException {
61 QueryParser parser = new QueryParser();
62 return parser.parse2(query);
63 }
64
65 public QueryParser() {
66 this(new FastCharStream(new StringReader("")));
67 }
68
69 public SrndQuery parse2(String query) throws ParseException {
70 ReInit(new FastCharStream(new StringReader(query)));
71 try {
72 return TopSrndQuery();
73 } catch (TokenMgrError tme) {
74 throw new ParseException(tme.getMessage());
75 }
76 }
77
78 protected SrndQuery getFieldsQuery(
79 SrndQuery q, ArrayList<String> fieldNames) {
80
81
82
83 return new FieldsQuery(q, fieldNames, fieldOperator);
84 }
85
86 protected SrndQuery getOrQuery(List<SrndQuery> queries, boolean infix, Token orToken) {
87 return new OrQuery(queries, infix, orToken.image);
88 }
89
90 protected SrndQuery getAndQuery(List<SrndQuery> queries, boolean infix, Token andToken) {
91 return new AndQuery( queries, infix, andToken.image);
92 }
93
94 protected SrndQuery getNotQuery(List<SrndQuery> queries, Token notToken) {
95 return new NotQuery( queries, notToken.image);
96 }
97
98 protected static int getOpDistance(String distanceOp) {
99
100 return distanceOp.length() == 1
101 ? 1
102 : Integer.parseInt( distanceOp.substring( 0, distanceOp.length() - 1));
103 }
104
105 protected static void checkDistanceSubQueries(DistanceQuery distq, String opName)
106 throws ParseException {
107 String m = distq.distanceSubQueryNotAllowed();
108 if (m != null) {
109 throw new ParseException("Operator " + opName + ": " + m);
110 }
111 }
112
113 protected SrndQuery getDistanceQuery(
114 List<SrndQuery> queries,
115 boolean infix,
116 Token dToken,
117 boolean ordered) throws ParseException {
118 DistanceQuery dq = new DistanceQuery(queries,
119 infix,
120 getOpDistance(dToken.image),
121 dToken.image,
122 ordered);
123 checkDistanceSubQueries(dq, dToken.image);
124 return dq;
125 }
126
127 protected SrndQuery getTermQuery(
128 String term, boolean quoted) {
129 return new SrndTermQuery(term, quoted);
130 }
131
132 protected boolean allowedSuffix(String suffixed) {
133 return (suffixed.length() - 1) >= minimumPrefixLength;
134 }
135
136 protected SrndQuery getPrefixQuery(
137 String prefix, boolean quoted) {
138 return new SrndPrefixQuery(prefix, quoted, truncator);
139 }
140
141 protected boolean allowedTruncation(String truncated) {
142
143 int nrNormalChars = 0;
144 for (int i = 0; i < truncated.length(); i++) {
145 char c = truncated.charAt(i);
146 if ((c != truncator) && (c != anyChar)) {
147 nrNormalChars++;
148 }
149 }
150 return nrNormalChars >= minimumCharsInTrunc;
151 }
152
153 protected SrndQuery getTruncQuery(String truncated) {
154 return new SrndTruncQuery(truncated, truncator, anyChar);
155 }
156
157 final public SrndQuery TopSrndQuery() throws ParseException {
158 SrndQuery q;
159 q = FieldsQuery();
160 jj_consume_token(0);
161 {if (true) return q;}
162 throw new Error("Missing return statement in function");
163 }
164
165 final public SrndQuery FieldsQuery() throws ParseException {
166 SrndQuery q;
167 ArrayList<String> fieldNames;
168 fieldNames = OptionalFields();
169 q = OrQuery();
170 {if (true) return (fieldNames == null) ? q : getFieldsQuery(q, fieldNames);}
171 throw new Error("Missing return statement in function");
172 }
173
174 final public ArrayList<String> OptionalFields() throws ParseException {
175 Token fieldName;
176 ArrayList<String> fieldNames = null;
177 label_1:
178 while (true) {
179 if (jj_2_1(2)) {
180 ;
181 } else {
182 break label_1;
183 }
184
185 fieldName = jj_consume_token(TERM);
186 jj_consume_token(COLON);
187 if (fieldNames == null) {
188 fieldNames = new ArrayList<String>();
189 }
190 fieldNames.add(fieldName.image);
191 }
192 {if (true) return fieldNames;}
193 throw new Error("Missing return statement in function");
194 }
195
196 final public SrndQuery OrQuery() throws ParseException {
197 SrndQuery q;
198 ArrayList<SrndQuery> queries = null;
199 Token oprt = null;
200 q = AndQuery();
201 label_2:
202 while (true) {
203 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
204 case OR:
205 ;
206 break;
207 default:
208 jj_la1[0] = jj_gen;
209 break label_2;
210 }
211 oprt = jj_consume_token(OR);
212
213 if (queries == null) {
214 queries = new ArrayList<SrndQuery>();
215 queries.add(q);
216 }
217 q = AndQuery();
218 queries.add(q);
219 }
220 {if (true) return (queries == null) ? q : getOrQuery(queries, true , oprt);}
221 throw new Error("Missing return statement in function");
222 }
223
224 final public SrndQuery AndQuery() throws ParseException {
225 SrndQuery q;
226 ArrayList<SrndQuery> queries = null;
227 Token oprt = null;
228 q = NotQuery();
229 label_3:
230 while (true) {
231 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
232 case AND:
233 ;
234 break;
235 default:
236 jj_la1[1] = jj_gen;
237 break label_3;
238 }
239 oprt = jj_consume_token(AND);
240
241 if (queries == null) {
242 queries = new ArrayList<SrndQuery>();
243 queries.add(q);
244 }
245 q = NotQuery();
246 queries.add(q);
247 }
248 {if (true) return (queries == null) ? q : getAndQuery(queries, true , oprt);}
249 throw new Error("Missing return statement in function");
250 }
251
252 final public SrndQuery NotQuery() throws ParseException {
253 SrndQuery q;
254 ArrayList<SrndQuery> queries = null;
255 Token oprt = null;
256 q = NQuery();
257 label_4:
258 while (true) {
259 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
260 case NOT:
261 ;
262 break;
263 default:
264 jj_la1[2] = jj_gen;
265 break label_4;
266 }
267 oprt = jj_consume_token(NOT);
268
269 if (queries == null) {
270 queries = new ArrayList<SrndQuery>();
271 queries.add(q);
272 }
273 q = NQuery();
274 queries.add(q);
275 }
276 {if (true) return (queries == null) ? q : getNotQuery(queries, oprt);}
277 throw new Error("Missing return statement in function");
278 }
279
280 final public SrndQuery NQuery() throws ParseException {
281 SrndQuery q;
282 ArrayList<SrndQuery> queries;
283 Token dt;
284 q = WQuery();
285 label_5:
286 while (true) {
287 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
288 case N:
289 ;
290 break;
291 default:
292 jj_la1[3] = jj_gen;
293 break label_5;
294 }
295 dt = jj_consume_token(N);
296 queries = new ArrayList<SrndQuery>();
297 queries.add(q);
298
299 q = WQuery();
300 queries.add(q);
301 q = getDistanceQuery(queries, true , dt, false );
302 }
303 {if (true) return q;}
304 throw new Error("Missing return statement in function");
305 }
306
307 final public SrndQuery WQuery() throws ParseException {
308 SrndQuery q;
309 ArrayList<SrndQuery> queries;
310 Token wt;
311 q = PrimaryQuery();
312 label_6:
313 while (true) {
314 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
315 case W:
316 ;
317 break;
318 default:
319 jj_la1[4] = jj_gen;
320 break label_6;
321 }
322 wt = jj_consume_token(W);
323 queries = new ArrayList<SrndQuery>();
324 queries.add(q);
325
326 q = PrimaryQuery();
327 queries.add(q);
328 q = getDistanceQuery(queries, true , wt, true );
329 }
330 {if (true) return q;}
331 throw new Error("Missing return statement in function");
332 }
333
334 final public SrndQuery PrimaryQuery() throws ParseException {
335
336 SrndQuery q;
337 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
338 case LPAREN:
339 jj_consume_token(LPAREN);
340 q = FieldsQuery();
341 jj_consume_token(RPAREN);
342 break;
343 case OR:
344 case AND:
345 case W:
346 case N:
347 q = PrefixOperatorQuery();
348 break;
349 case TRUNCQUOTED:
350 case QUOTED:
351 case SUFFIXTERM:
352 case TRUNCTERM:
353 case TERM:
354 q = SimpleTerm();
355 break;
356 default:
357 jj_la1[5] = jj_gen;
358 jj_consume_token(-1);
359 throw new ParseException();
360 }
361 OptionalWeights(q);
362 {if (true) return q;}
363 throw new Error("Missing return statement in function");
364 }
365
366 final public SrndQuery PrefixOperatorQuery() throws ParseException {
367 Token oprt;
368 List<SrndQuery> queries;
369 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
370 case OR:
371 oprt = jj_consume_token(OR);
372
373 queries = FieldsQueryList();
374 {if (true) return getOrQuery(queries, false , oprt);}
375 break;
376 case AND:
377 oprt = jj_consume_token(AND);
378
379 queries = FieldsQueryList();
380 {if (true) return getAndQuery(queries, false , oprt);}
381 break;
382 case N:
383 oprt = jj_consume_token(N);
384
385 queries = FieldsQueryList();
386 {if (true) return getDistanceQuery(queries, false , oprt, false );}
387 break;
388 case W:
389 oprt = jj_consume_token(W);
390
391 queries = FieldsQueryList();
392 {if (true) return getDistanceQuery(queries, false , oprt, true );}
393 break;
394 default:
395 jj_la1[6] = jj_gen;
396 jj_consume_token(-1);
397 throw new ParseException();
398 }
399 throw new Error("Missing return statement in function");
400 }
401
402 final public List<SrndQuery> FieldsQueryList() throws ParseException {
403 SrndQuery q;
404 ArrayList<SrndQuery> queries = new ArrayList<SrndQuery>();
405 jj_consume_token(LPAREN);
406 q = FieldsQuery();
407 queries.add(q);
408 label_7:
409 while (true) {
410 jj_consume_token(COMMA);
411 q = FieldsQuery();
412 queries.add(q);
413 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
414 case COMMA:
415 ;
416 break;
417 default:
418 jj_la1[7] = jj_gen;
419 break label_7;
420 }
421 }
422 jj_consume_token(RPAREN);
423 {if (true) return queries;}
424 throw new Error("Missing return statement in function");
425 }
426
427 final public SrndQuery SimpleTerm() throws ParseException {
428 Token term;
429 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
430 case TERM:
431 term = jj_consume_token(TERM);
432 {if (true) return getTermQuery(term.image, false );}
433 break;
434 case QUOTED:
435 term = jj_consume_token(QUOTED);
436 {if (true) return getTermQuery(term.image.substring(1, term.image.length()-1), true );}
437 break;
438 case SUFFIXTERM:
439 term = jj_consume_token(SUFFIXTERM);
440
441 if (! allowedSuffix(term.image)) {
442 {if (true) throw new ParseException(truncationErrorMessage + term.image);}
443 }
444 {if (true) return getPrefixQuery(term.image.substring(0, term.image.length()-1), false );}
445 break;
446 case TRUNCTERM:
447 term = jj_consume_token(TRUNCTERM);
448
449 if (! allowedTruncation(term.image)) {
450 {if (true) throw new ParseException(truncationErrorMessage + term.image);}
451 }
452 {if (true) return getTruncQuery(term.image);}
453 break;
454 case TRUNCQUOTED:
455 term = jj_consume_token(TRUNCQUOTED);
456
457 if ((term.image.length() - 3) < minimumPrefixLength) {
458 {if (true) throw new ParseException(truncationErrorMessage + term.image);}
459 }
460 {if (true) return getPrefixQuery(term.image.substring(1, term.image.length()-2), true );}
461 break;
462 default:
463 jj_la1[8] = jj_gen;
464 jj_consume_token(-1);
465 throw new ParseException();
466 }
467 throw new Error("Missing return statement in function");
468 }
469
470 final public void OptionalWeights(SrndQuery q) throws ParseException {
471 Token weight=null;
472 label_8:
473 while (true) {
474 switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
475 case CARAT:
476 ;
477 break;
478 default:
479 jj_la1[9] = jj_gen;
480 break label_8;
481 }
482 jj_consume_token(CARAT);
483 weight = jj_consume_token(NUMBER);
484 float f;
485 try {
486 f = Float.valueOf(weight.image).floatValue();
487 } catch (Exception floatExc) {
488 {if (true) throw new ParseException(boostErrorMessage + weight.image + " (" + floatExc + ")");}
489 }
490 if (f <= 0.0) {
491 {if (true) throw new ParseException(boostErrorMessage + weight.image);}
492 }
493 q.setWeight(f * q.getWeight());
494
495 }
496 }
497
498 private boolean jj_2_1(int xla) {
499 jj_la = xla; jj_lastpos = jj_scanpos = token;
500 try { return !jj_3_1(); }
501 catch(LookaheadSuccess ls) { return true; }
502 finally { jj_save(0, xla); }
503 }
504
505 private boolean jj_3_1() {
506 if (jj_scan_token(TERM)) return true;
507 if (jj_scan_token(COLON)) return true;
508 return false;
509 }
510
511
512 public QueryParserTokenManager token_source;
513
514 public Token token;
515
516 public Token jj_nt;
517 private int jj_ntk;
518 private Token jj_scanpos, jj_lastpos;
519 private int jj_la;
520 private int jj_gen;
521 final private int[] jj_la1 = new int[10];
522 static private int[] jj_la1_0;
523 static {
524 jj_la1_init_0();
525 }
526 private static void jj_la1_init_0() {
527 jj_la1_0 = new int[] {0x100,0x200,0x400,0x1000,0x800,0x7c3b00,0x1b00,0x8000,0x7c0000,0x20000,};
528 }
529 final private JJCalls[] jj_2_rtns = new JJCalls[1];
530 private boolean jj_rescan = false;
531 private int jj_gc = 0;
532
533
534 public QueryParser(CharStream stream) {
535 token_source = new QueryParserTokenManager(stream);
536 token = new Token();
537 jj_ntk = -1;
538 jj_gen = 0;
539 for (int i = 0; i < 10; i++) jj_la1[i] = -1;
540 for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
541 }
542
543
544 public void ReInit(CharStream stream) {
545 token_source.ReInit(stream);
546 token = new Token();
547 jj_ntk = -1;
548 jj_gen = 0;
549 for (int i = 0; i < 10; i++) jj_la1[i] = -1;
550 for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
551 }
552
553
554 public QueryParser(QueryParserTokenManager tm) {
555 token_source = tm;
556 token = new Token();
557 jj_ntk = -1;
558 jj_gen = 0;
559 for (int i = 0; i < 10; i++) jj_la1[i] = -1;
560 for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
561 }
562
563
564 public void ReInit(QueryParserTokenManager tm) {
565 token_source = tm;
566 token = new Token();
567 jj_ntk = -1;
568 jj_gen = 0;
569 for (int i = 0; i < 10; i++) jj_la1[i] = -1;
570 for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
571 }
572
573 private Token jj_consume_token(int kind) throws ParseException {
574 Token oldToken;
575 if ((oldToken = token).next != null) token = token.next;
576 else token = token.next = token_source.getNextToken();
577 jj_ntk = -1;
578 if (token.kind == kind) {
579 jj_gen++;
580 if (++jj_gc > 100) {
581 jj_gc = 0;
582 for (int i = 0; i < jj_2_rtns.length; i++) {
583 JJCalls c = jj_2_rtns[i];
584 while (c != null) {
585 if (c.gen < jj_gen) c.first = null;
586 c = c.next;
587 }
588 }
589 }
590 return token;
591 }
592 token = oldToken;
593 jj_kind = kind;
594 throw generateParseException();
595 }
596
597 static private final class LookaheadSuccess extends java.lang.Error { }
598 final private LookaheadSuccess jj_ls = new LookaheadSuccess();
599 private boolean jj_scan_token(int kind) {
600 if (jj_scanpos == jj_lastpos) {
601 jj_la--;
602 if (jj_scanpos.next == null) {
603 jj_lastpos = jj_scanpos = jj_scanpos.next = token_source.getNextToken();
604 } else {
605 jj_lastpos = jj_scanpos = jj_scanpos.next;
606 }
607 } else {
608 jj_scanpos = jj_scanpos.next;
609 }
610 if (jj_rescan) {
611 int i = 0; Token tok = token;
612 while (tok != null && tok != jj_scanpos) { i++; tok = tok.next; }
613 if (tok != null) jj_add_error_token(kind, i);
614 }
615 if (jj_scanpos.kind != kind) return true;
616 if (jj_la == 0 && jj_scanpos == jj_lastpos) throw jj_ls;
617 return false;
618 }
619
620
621
622 final public Token getNextToken() {
623 if (token.next != null) token = token.next;
624 else token = token.next = token_source.getNextToken();
625 jj_ntk = -1;
626 jj_gen++;
627 return token;
628 }
629
630
631 final public Token getToken(int index) {
632 Token t = token;
633 for (int i = 0; i < index; i++) {
634 if (t.next != null) t = t.next;
635 else t = t.next = token_source.getNextToken();
636 }
637 return t;
638 }
639
640 private int jj_ntk() {
641 if ((jj_nt=token.next) == null)
642 return (jj_ntk = (token.next=token_source.getNextToken()).kind);
643 else
644 return (jj_ntk = jj_nt.kind);
645 }
646
647 private java.util.List<int[]> jj_expentries = new java.util.ArrayList<int[]>();
648 private int[] jj_expentry;
649 private int jj_kind = -1;
650 private int[] jj_lasttokens = new int[100];
651 private int jj_endpos;
652
653 private void jj_add_error_token(int kind, int pos) {
654 if (pos >= 100) return;
655 if (pos == jj_endpos + 1) {
656 jj_lasttokens[jj_endpos++] = kind;
657 } else if (jj_endpos != 0) {
658 jj_expentry = new int[jj_endpos];
659 for (int i = 0; i < jj_endpos; i++) {
660 jj_expentry[i] = jj_lasttokens[i];
661 }
662 jj_entries_loop: for (java.util.Iterator<?> it = jj_expentries.iterator(); it.hasNext();) {
663 int[] oldentry = (int[])(it.next());
664 if (oldentry.length == jj_expentry.length) {
665 for (int i = 0; i < jj_expentry.length; i++) {
666 if (oldentry[i] != jj_expentry[i]) {
667 continue jj_entries_loop;
668 }
669 }
670 jj_expentries.add(jj_expentry);
671 break jj_entries_loop;
672 }
673 }
674 if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind;
675 }
676 }
677
678
679 public ParseException generateParseException() {
680 jj_expentries.clear();
681 boolean[] la1tokens = new boolean[24];
682 if (jj_kind >= 0) {
683 la1tokens[jj_kind] = true;
684 jj_kind = -1;
685 }
686 for (int i = 0; i < 10; i++) {
687 if (jj_la1[i] == jj_gen) {
688 for (int j = 0; j < 32; j++) {
689 if ((jj_la1_0[i] & (1<<j)) != 0) {
690 la1tokens[j] = true;
691 }
692 }
693 }
694 }
695 for (int i = 0; i < 24; i++) {
696 if (la1tokens[i]) {
697 jj_expentry = new int[1];
698 jj_expentry[0] = i;
699 jj_expentries.add(jj_expentry);
700 }
701 }
702 jj_endpos = 0;
703 jj_rescan_token();
704 jj_add_error_token(0, 0);
705 int[][] exptokseq = new int[jj_expentries.size()][];
706 for (int i = 0; i < jj_expentries.size(); i++) {
707 exptokseq[i] = jj_expentries.get(i);
708 }
709 return new ParseException(token, exptokseq, tokenImage);
710 }
711
712
713 final public void enable_tracing() {
714 }
715
716
717 final public void disable_tracing() {
718 }
719
720 private void jj_rescan_token() {
721 jj_rescan = true;
722 for (int i = 0; i < 1; i++) {
723 try {
724 JJCalls p = jj_2_rtns[i];
725 do {
726 if (p.gen > jj_gen) {
727 jj_la = p.arg; jj_lastpos = jj_scanpos = p.first;
728 switch (i) {
729 case 0: jj_3_1(); break;
730 }
731 }
732 p = p.next;
733 } while (p != null);
734 } catch(LookaheadSuccess ls) { }
735 }
736 jj_rescan = false;
737 }
738
739 private void jj_save(int index, int xla) {
740 JJCalls p = jj_2_rtns[index];
741 while (p.gen > jj_gen) {
742 if (p.next == null) { p = p.next = new JJCalls(); break; }
743 p = p.next;
744 }
745 p.gen = jj_gen + xla - jj_la; p.first = token; p.arg = xla;
746 }
747
748 static final class JJCalls {
749 int gen;
750 Token first;
751 int arg;
752 JJCalls next;
753 }
754
755 }